In [1]:
import sys
print sys.version
import numpy; print 'numpy', numpy.version.full_version
import scipy; print 'scipy', scipy.version.full_version
import numexpr; print 'numexpr', numexpr.version.version
import Cython; print Cython.__version__


2.7.3 |EPD 7.3-1 (64-bit)| (default, Apr 12 2012, 11:14:05) 
[GCC 4.0.1 (Apple Inc. build 5493)]
numpy 1.7.1
scipy 0.12.0
numexpr 2.1
0.19.1

test setup


In [2]:
import numpy.random
a = numpy.random.rand(20000).astype(np.float32)

scipy.misc.logsumexp


In [3]:
import scipy.misc
print(scipy.misc.logsumexp(a))
%timeit scipy.misc.logsumexp(a)


10.4467
1000 loops, best of 3: 379 µs per loop

scipy.weave


In [4]:
import scipy.weave

def lse_weave(u): # no timing difference taking it out of the function, ie leaving scipy.weave.inline() bare
    code = """
    
    int i;
    float r = 0;
    float largest_in_u = 0;
    for (i=0; i<Nu[0]; i++) {
           if (U1(i) > largest_in_u) {
               largest_in_u = U1(i);
               }
           }
    for (i=0; i<Nu[0]; i++) {
           r += exp(U1(i) - largest_in_u);
           }
    return_val = largest_in_u + log(r);
    """
    return scipy.weave.inline(code, ['u']) # no change using type_converters=converters.blitz

print(lse_weave(a))
%timeit lse_weave(a)


10.4467144498
1000 loops, best of 3: 363 µs per loop

numexpr


In [5]:
import numexpr

def lse_numexpr(a):
    largest_in_a = np.max(a)
    r = numexpr.evaluate("sum(exp(a - largest_in_a))")
    return largest_in_a + np.log(r) 
    # np.log(b) faster than numexpr.evaluate("log(r)") 
    # numexpr.evaluate("log(sum(exp(a)))") will return an error

print(lse_numexpr(a))
%timeit lse_numexpr(a)


10.4467
1000 loops, best of 3: 381 µs per loop

cython


In [6]:
%load_ext cythonmagic

In [7]:
%%cython
cimport cython
import numpy as np
cimport numpy as np
from libc.math cimport exp, log # 40x speedup using this instead of np.exp, np.log which result in python numpy calls

DTYPE=np.float32
ctypedef np.float32_t DTYPE_t

@cython.boundscheck(False)
@cython.wraparound(False)
cpdef lse_cython(np.ndarray[DTYPE_t, ndim=1] a) :
    cdef int i
    cdef double result = 0.0
    cdef double largest_in_a = a[0]
    for i in range(1,a.shape[0]):
        if (a[i] > largest_in_a):
            largest_in_a = a[i]
    for i in range(a.shape[0]):
        result += exp(a[i] - largest_in_a) 
    return largest_in_a + log(result)

In [8]:
print(lse_cython(a))
%timeit lse_cython(a)


10.4467162773
1000 loops, best of 3: 377 µs per loop

custom sse


In [9]:
import sselogsumexp
sselogsumexp.logsumexp(a)
%timeit sselogsumexp.logsumexp(a)


10000 loops, best of 3: 151 µs per loop

recap


In [10]:
print("%timeit scipy.misc.logsumexp(a)")
print(scipy.misc.logsumexp(a))
%timeit scipy.misc.logsumexp(a)

print(lse_weave(a))
print("%timeit lse_weave(a)")
%timeit lse_weave(a)

print(lse_numexpr(a))
print("%timeit lse_numexpr(a)")
%timeit lse_numexpr(a) 

print(lse_cython(a))
print("%timeit lse_cython(a)")
%timeit lse_cython(a)

print sselogsumexp.logsumexp(a)
print("%timeit sselogsumexp.logsumexp(a)")
%timeit sselogsumexp.logsumexp(a)


%timeit scipy.misc.logsumexp(a)
10.4467
1000 loops, best of 3: 363 µs per loop
10.4467144498
%timeit lse_weave(a)
1000 loops, best of 3: 352 µs per loop
10.4467
%timeit lse_numexpr(a)
1000 loops, best of 3: 360 µs per loop
10.4467162773
%timeit lse_cython(a)
1000 loops, best of 3: 361 µs per loop
10.4467163086
%timeit sselogsumexp.logsumexp(a)
10000 loops, best of 3: 149 µs per loop

In [10]: